The purpose of this notebook is to find subclusters and annotate CD4 T cells with the feedback we received from the annotation team.
library(Seurat)
library(tidyverse)
# Paths
path_to_obj <- here::here("scRNA-seq/results/R_objects/level_3/CD4_T/CD4_T_clustered_level_3_with_pre_freeze.rds")
path_to_save <- here::here("scRNA-seq/results/R_objects/level_3/CD4_T/CD4_T_annotated_level_3.rds")
path_to_save_df <- here::here("scRNA-seq/3-clustering/3-level_3/tmp/CD4_T/CD4_T_annotation_level_3_df.rds")
# Colors
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",
"#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32",
"black", "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2",
"#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",
"#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",
"Brown")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# Additional function
plot_subcluster <- function(seurat_obj, pattern) {
p <- seurat_obj@reductions$umap@cell.embeddings %>%
as.data.frame() %>%
mutate(cluster = seurat_obj$annotation_level_3) %>%
filter(str_detect(cluster, pattern)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point(size = 0.1) +
theme_classic()
p
}
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 37378 features across 78974 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, umap, harmony
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
seurat$annotation_level_3 <- seurat$seurat_clusters
Idents(seurat) <- "annotation_level_3"
seurat <- FindSubCluster(
seurat,
cluster = "1",
graph.name = "RNA_nn",
resolution = 0.25,
subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11263
## Number of edges: 102967
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7959
## Number of communities: 20
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
plot_subcluster(seurat, "^1_")
Markers:
clusters_1 <- seurat$annotation_level_3 %>%
unique() %>%
str_subset("^1_") %>%
sort()
markers_1 <- purrr::map(clusters_1, function(x) {
group_1 <- clusters_1[which(clusters_1 == x)]
group_2 <- clusters_1[which(clusters_1 != x)]
df <- FindMarkers(
seurat,
ident.1 = group_1,
ident.2 = group_2,
only.pos = TRUE,
logfc.threshold = 0.5,
verbose = TRUE
)
df <- df %>%
rownames_to_column(var = "gene") %>%
arrange(desc(avg_log2FC))
df
})
names(markers_1) <- clusters_1
DT::datatable(markers_1$`1_0`)
DT::datatable(markers_1$`1_1`)
seurat <- FindSubCluster(
seurat,
cluster = "5",
graph.name = "RNA_snn",
resolution = 0.3,
subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5335
## Number of edges: 163910
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8391
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
plot_subcluster(seurat, "^5_")
Markers:
clusters_5 <- seurat$annotation_level_3 %>%
unique() %>%
str_subset("^5_") %>%
sort()
markers_5 <- purrr::map(clusters_5, function(x) {
group_1 <- clusters_5[which(clusters_5 == x)]
group_2 <- clusters_5[which(clusters_5 != x)]
df <- FindMarkers(
seurat,
ident.1 = group_1,
ident.2 = group_2,
only.pos = TRUE,
logfc.threshold = 0.5,
verbose = TRUE
)
df <- df %>%
rownames_to_column(var = "gene") %>%
arrange(desc(avg_log2FC))
df
})
names(markers_5) <- clusters_5
DT::datatable(markers_5$`5_0`)
DT::datatable(markers_5$`5_1`)
DT::datatable(markers_5$`5_2`)
DT::datatable(markers_5$`5_3`)
seurat <- FindSubCluster(
seurat,
cluster = "9",
graph.name = "RNA_snn",
resolution = 0.15,
subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3832
## Number of edges: 100622
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9064
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
plot_subcluster(seurat, "^9_")
Markers:
clusters_9 <- seurat$annotation_level_3 %>%
unique() %>%
str_subset("^9_") %>%
sort()
markers_9 <- purrr::map(clusters_9, function(x) {
group_1 <- clusters_9[which(clusters_9 == x)]
group_2 <- clusters_9[which(clusters_9 != x)]
print(group_1)
print(group_2)
df <- FindMarkers(
seurat,
ident.1 = group_1,
ident.2 = group_2,
only.pos = TRUE,
logfc.threshold = 0.5,
verbose = TRUE
)
df <- df %>%
rownames_to_column(var = "gene") %>%
arrange(desc(avg_log2FC))
df
})
## [1] "9_0"
## [1] "9_1" "9_2" "9_3"
## [1] "9_1"
## [1] "9_0" "9_2" "9_3"
## [1] "9_2"
## [1] "9_0" "9_1" "9_3"
## [1] "9_3"
## [1] "9_0" "9_1" "9_2"
names(markers_9) <- clusters_9
DT::datatable(markers_9$`9_0`)
DT::datatable(markers_9$`9_1`)
DT::datatable(markers_9$`9_2`)
DT::datatable(markers_9$`9_3`)
seurat <- FindSubCluster(
seurat,
cluster = "11",
graph.name = "RNA_nn",
resolution = 0.25,
subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2230
## Number of edges: 22679
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7868
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
plot_subcluster(seurat, "^11_")
Markers:
clusters_11 <- seurat$annotation_level_3 %>%
unique() %>%
str_subset("^11_") %>%
sort()
markers_11 <- purrr::map(clusters_11, function(x) {
group_1 <- clusters_11[which(clusters_11 == x)]
group_2 <- clusters_11[which(clusters_11 != x)]
df <- FindMarkers(
seurat,
ident.1 = group_1,
ident.2 = group_2,
only.pos = TRUE,
logfc.threshold = 0.5,
verbose = TRUE
)
df <- df %>%
rownames_to_column(var = "gene") %>%
arrange(desc(avg_log2FC))
df
})
names(markers_11) <- clusters_11
DT::datatable(markers_11$`11_0`)
DT::datatable(markers_11$`11_1`)
annotation_level_3 <- c(
"0" = "Naive",
"1_0" = "Follicular Th CXCL13+CBLB+",
"1_1" = "Follicular Th TOX2+",
"2" = "Follicular Th CXCR5+",
"3" = "Mitochondrial+ T cells",
"4" = "Central Mem PASK-",
"5_0" = "IL2RA+FOXP3+ Treg",
"5_1" = "Th17",
"5_2" = "Memory T cells",
"5_3" = "Doublets",
"6" = "CD8",
"7" = "Central Mem PASK+",
"8" = "Doublets",
"9_0" = "CD200+TOX+",
"9_1" = "Naive",
"9_2" = "metabolic/poor-quality",
"9_3" = "activated CD4 T",
"10" = "CD4 Non-Tfh KLRB1+ ",
"11_0" = "naive Treg IKZF2+",
"11_1" = "Treg IKZF2+HPGD+",
"12" = "Proliferative",
"13" = "Proliferative",
"14" = "poor-quality",
"15" = "poor-quality"
)
seurat$annotation_level_3 <- annotation_level_3[seurat$annotation_level_3]
DimPlot(
seurat,
group.by = "annotation_level_3",
cols = color_palette,
pt.size = 0.2
)
saveRDS(seurat, path_to_save)
seurat$cell_barcode <- colnames(seurat)
df <- seurat@meta.data[, c("cell_barcode", "annotation_level_3")]
saveRDS(df, path_to_save_df)
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
##
## Matrix products: default
## BLAS: /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0 SeuratObject_4.0.0 Seurat_4.0.0 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.1 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2 magrittr_2.0.1 tensor_1.5 cluster_2.1.0 ROCR_1.0-11 globals_0.14.0 modelr_0.1.8 matrixStats_0.58.0 colorspace_2.0-0 rvest_0.3.6 ggrepel_0.9.1 haven_2.3.1 xfun_0.21 crayon_1.4.1 jsonlite_1.7.2 spatstat_1.64-1 spatstat.data_2.0-0 survival_3.2-3 zoo_1.8-8 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.7 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 Rcpp_1.0.6 viridisLite_0.3.0 xtable_1.8-4 reticulate_1.18 DT_0.13 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1 ica_1.0-2 pkgconfig_2.0.3 farver_2.0.3 sass_0.3.1 uwot_0.1.10 dbplyr_2.1.0 deldir_0.2-10 utf8_1.1.4
## [57] here_1.0.1 tidyselect_1.1.0 labeling_0.4.2 rlang_0.4.10 reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0 cellranger_1.1.0 tools_4.0.1 cli_2.3.1 generics_0.1.0 broom_0.7.4 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.31 fs_1.5.0 fitdistrplus_1.1-3 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-148 mime_0.10 xml2_1.3.2 compiler_4.0.1 rstudioapi_0.13 plotly_4.9.3 png_0.1-7 spatstat.utils_2.0-0 reprex_1.0.0 bslib_0.2.4 stringi_1.5.3 highr_0.8 ps_1.5.0 lattice_0.20-41 Matrix_1.2-18 vctrs_0.3.6 pillar_1.5.0 lifecycle_1.0.0 BiocManager_1.30.10 lmtest_0.9-38 jquerylib_0.1.3 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.5.5 patchwork_1.1.1 R6_2.5.0 bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-17 gridExtra_2.3 parallelly_1.23.0
## [113] codetools_0.2-16 MASS_7.3-51.6 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.1 sctransform_0.3.2 mgcv_1.8-31 parallel_4.0.1 hms_1.0.0 grid_4.0.1 rpart_4.1-15 rmarkdown_2.7 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.9.2